{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate a single prediction agains ground truth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sample HTML code\n",
    "pred = '<html><body><table><thead><tr><td><b>Name of algori</b></td><td><b>Notablefeatures</b></td></tr></thead><tbody><tr><td>MACS [23]</td><td>Uses both a control library and local statistics to minimize bias</td></tr><tr><td>SICER [15]</td><td>Designed for detecting diffusely enriched regions; for example, histone modification</td></tr><tr><td>PeakSEQ [24]</td><td>Corrects for reference genome mappability and local statistics</td></tr><tr><td>SISSRs [25]</td><td>High resolution, precise identification of binding-site location</td></tr><tr><td>F-seq [26]</td><td>Uses kernel density estimation</td></tr></tbody></table></body></html>'\n",
    "true = '<html><body><table><thead><tr><td><b>Name of algorithm</b></td><td><b>Notable features</b></td></tr></thead><tbody><tr><td>MACS [23]</td><td>Uses both a control library and local statistics to minimize bias</td></tr><tr><td>SICER [14]</td><td>Designed for detecting diffusely enriched regions; for example, histone modification</td></tr><tr><td>PeakSeq [24]</td><td>Corrects for reference genome mappability and local statistics</td></tr><tr><td>SISSRs [25]</td><td>High resolution, precise identification of binding-site location</td></tr><tr><td>F-seq [26]</td><td>Uses kernel density estimation</td></tr></tbody></table></body></html>'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TEDS score: 0.9781765018607124\n"
     ]
    }
   ],
   "source": [
    "from metric import TEDS\n",
    "# Initialize TEDS object\n",
    "teds = TEDS()\n",
    "# Evaluate\n",
    "score = teds.evaluate(pred, true)\n",
    "print('TEDS score:', score)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Batch evaluation with parallel threads"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import pprint\n",
    "from metric import TEDS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load sample ground truth and predictions\n",
    "with open('sample_pred.json') as fp:\n",
    "    pred_json = json.load(fp)\n",
    "with open('sample_gt.json') as fp:\n",
    "    true_json = json.load(fp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 19.0/19.0 [00:10<00:00, 1.50s/it]\n",
      "19it [00:00, 112400.25it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'PMC2094709_004_00.png': 1.0,\n",
      " 'PMC2871264_002_00.png': 1.0,\n",
      " 'PMC2915972_003_00.png': 0.9298260149130074,\n",
      " 'PMC3160368_005_00.png': 0.994615695248351,\n",
      " 'PMC3568059_003_00.png': 0.9609420535891124,\n",
      " 'PMC3707453_006_00.png': 0.8538903625110521,\n",
      " 'PMC3765162_003_01.png': 0.9867342100509474,\n",
      " 'PMC3872294_001_00.png': 0.9863636363636363,\n",
      " 'PMC4196076_004_00.png': 0.9958653089334908,\n",
      " 'PMC4219599_004_00.png': 0.6029978075326913,\n",
      " 'PMC4297392_007_00.png': 0.8070175438596492,\n",
      " 'PMC4311460_007_00.png': 0.6576923076923077,\n",
      " 'PMC4357206_002_00.png': 0.9295181638546892,\n",
      " 'PMC4445578_009_01.png': 0.6754965084868096,\n",
      " 'PMC4969833_016_01.png': 1.0,\n",
      " 'PMC5303243_003_00.png': 0.6494374120956399,\n",
      " 'PMC5451934_004_00.png': 0.9978213507625272,\n",
      " 'PMC5755158_010_01.png': 1.0,\n",
      " 'PMC5849724_006_00.png': 0.9653439200120101,\n",
      " 'PMC6022086_007_00.png': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# Initialize TEDS object, using 4 parallel threads\n",
    "teds = TEDS(n_jobs=4)\n",
    "# Evaluate\n",
    "scores = teds.batch_evaluate(pred_json, true_json)\n",
    "# Print results\n",
    "pp = pprint.PrettyPrinter()\n",
    "pp.pprint(scores)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
